rm(list = ls())
library(data.table)
library(plyr)
library(tidyr)
library(lfe)
library(stargazer)
library(xtable)
library(sandwich)
library(roll)
library(readxl)
library(readr)
library(zoo)
library(texreg)
library(DescTools)

m <- data.table(read_xlsx("../Data/MasterData.xlsx", skip = 1, guess_max = 1e4))
m[, age := 2021 - year]

m[, memrableperiod_ownret := memrableperiod_ownret * 100]
m[, memrableperiod_pret := memrableperiod_pret * 100]
m[, memrableperiod_aret := memrableperiod_aret * 100]
m[, bias := (memrableperiod_pret - memrableperiod_aret)]

m[, retrecall_1day_rate := retrecall_1day_rate * 100]
m[, retrecall_30day_rate := retrecall_30day_rate * 100]
m[, retrecall_1year_rate := retrecall_1year_rate * 100]
m[, retrecall_5year_rate := retrecall_5year_rate * 100]

m[, expret30day_market_rate := expret30day_market_rate * 100]
m[, expret1year_market_rate := expret1year_market_rate * 100]
m[, expret30day_self_rate := expret30day_self_rate * 100]
m[, expret1year_self_rate := expret1year_self_rate * 100]


m[, memrableperiod_begin := as.yearmon(memrableperiod_begin, "%Ym%m")]
m[, memrableperiod_end := as.yearmon(memrableperiod_end, "%Ym%m")]
m[, memrableperiod_dist := as.yearmon(2022 + 2 / 12) - (memrableperiod_begin + memrableperiod_end) / 2]

C <- data.table(read_xlsx("../Data/AccountData.xlsx", guess_max = 1e4))

# variable list
C[, max_invest_ytd := as.numeric(max_invest_ytd) / 1e3]
C[, monthly_turnover_tsa := as.numeric(monthly_turnover_tsa)]
C[, monthly_raw_return_tsa := as.numeric(monthly_raw_return_tsa) * 100]
C[, monthly_net_return_tsa := as.numeric(monthly_net_return_tsa) * 100]
C[, beta_month_hold_tsa := as.numeric(beta_month_hold_tsa)]
C[, bm_month_hold_tsa := as.numeric(bm_month_hold_tsa)]
C[, mpret_month_hold_tsa := as.numeric(mpret_month_hold_tsa) * 100]
C[, mvol_month_hold_tsa := as.numeric(mvol_month_hold_tsa) * 100]

m <- merge(m, C,
    by = "id", all.x = T
)

m[, max_invest_ytd := Winsorize(max_invest_ytd, quantile(max_invest_ytd, probs = c(0.05, 0.95), na.rm = T))]
m[, monthly_turnover_tsa := Winsorize(monthly_turnover_tsa, quantile(monthly_turnover_tsa, probs = c(0.05, 0.95), na.rm = T))]
m[, monthly_raw_return_tsa := Winsorize(monthly_raw_return_tsa, quantile(monthly_raw_return_tsa, probs = c(0.05, 0.95), na.rm = T))]
m[, monthly_net_return_tsa := Winsorize(monthly_net_return_tsa, quantile(monthly_net_return_tsa, probs = c(0.05, 0.95), na.rm = T))]
m[, beta_month_hold_tsa := Winsorize(beta_month_hold_tsa, quantile(beta_month_hold_tsa, probs = c(0.05, 0.95), na.rm = T))]
m[, bm_month_hold_tsa := Winsorize(bm_month_hold_tsa, quantile(bm_month_hold_tsa, probs = c(0.05, 0.95), na.rm = T))]
m[, mpret_month_hold_tsa := Winsorize(mpret_month_hold_tsa, quantile(mpret_month_hold_tsa, probs = c(0.05, 0.95), na.rm = T))]
m[, mvol_month_hold_tsa := Winsorize(mvol_month_hold_tsa, quantile(mvol_month_hold_tsa, probs = c(0.05, 0.95), na.rm = T))]

m[, monthly_turnover_tsa := monthly_turnover_tsa * 100]

###########################################################################
# summary stats
summ <- NULL

summ <- rbind(summ, m[!is.na(max_invest_ytd), {
    list(
        var = "Max value of investment (K RMB)",
        N = .N,
        mean = mean(max_invest_ytd),
        sd = sd(max_invest_ytd),
        p5 = quantile(max_invest_ytd, 0.05),
        p25 = quantile(max_invest_ytd, 0.25),
        p50 = median(max_invest_ytd),
        p75 = quantile(max_invest_ytd, 0.75),
        p95 = quantile(max_invest_ytd, 0.95)
    )
}])

summ <- rbind(summ, m[!is.na(monthly_turnover_tsa), {
    list(
        var = "Monthly turnover (%)",
        N = .N,
        mean = mean(monthly_turnover_tsa),
        sd = sd(monthly_turnover_tsa),
        p5 = quantile(monthly_turnover_tsa, 0.05),
        p25 = quantile(monthly_turnover_tsa, 0.25),
        p50 = median(monthly_turnover_tsa),
        p75 = quantile(monthly_turnover_tsa, 0.75),
        p95 = quantile(monthly_turnover_tsa, 0.95)
    )
}])

summ <- rbind(summ, m[!is.na(monthly_raw_return_tsa), {
    list(
        var = "Monthly raw return (%)",
        N = .N,
        mean = mean(monthly_raw_return_tsa),
        sd = sd(monthly_raw_return_tsa),
        p5 = quantile(monthly_raw_return_tsa, 0.05),
        p25 = quantile(monthly_raw_return_tsa, 0.25),
        p50 = median(monthly_raw_return_tsa),
        p75 = quantile(monthly_raw_return_tsa, 0.75),
        p95 = quantile(monthly_raw_return_tsa, 0.95)
    )
}])

summ <- rbind(summ, m[!is.na(monthly_net_return_tsa), {
    list(
        var = "Monthly net return (%)",
        N = .N,
        mean = mean(monthly_net_return_tsa),
        sd = sd(monthly_net_return_tsa),
        p5 = quantile(monthly_net_return_tsa, 0.05),
        p25 = quantile(monthly_net_return_tsa, 0.25),
        p50 = median(monthly_net_return_tsa),
        p75 = quantile(monthly_net_return_tsa, 0.75),
        p95 = quantile(monthly_net_return_tsa, 0.95)
    )
}])

summ <- rbind(summ, m[!is.na(beta_month_hold_tsa), {
    list(
        var = "Market beta",
        N = .N,
        mean = mean(beta_month_hold_tsa),
        sd = sd(beta_month_hold_tsa),
        p5 = quantile(beta_month_hold_tsa, 0.05),
        p25 = quantile(beta_month_hold_tsa, 0.25),
        p50 = median(beta_month_hold_tsa),
        p75 = quantile(beta_month_hold_tsa, 0.75),
        p95 = quantile(beta_month_hold_tsa, 0.95)
    )
}])

summ <- rbind(summ, m[!is.na(bm_month_hold_tsa), {
    list(
        var = "Book-to-market ratio",
        N = .N,
        mean = mean(bm_month_hold_tsa),
        sd = sd(bm_month_hold_tsa),
        p5 = quantile(bm_month_hold_tsa, 0.05),
        p25 = quantile(bm_month_hold_tsa, 0.25),
        p50 = median(bm_month_hold_tsa),
        p75 = quantile(bm_month_hold_tsa, 0.75),
        p95 = quantile(bm_month_hold_tsa, 0.95)
    )
}])

summ <- rbind(summ, m[!is.na(mpret_month_hold_tsa), {
    list(
        var = "Past 30-day return (%)",
        N = .N,
        mean = mean(mpret_month_hold_tsa),
        sd = sd(mpret_month_hold_tsa),
        p5 = quantile(mpret_month_hold_tsa, 0.05),
        p25 = quantile(mpret_month_hold_tsa, 0.25),
        p50 = median(mpret_month_hold_tsa),
        p75 = quantile(mpret_month_hold_tsa, 0.75),
        p95 = quantile(mpret_month_hold_tsa, 0.95)
    )
}])

summ <- rbind(summ, m[!is.na(mvol_month_hold_tsa), {
    list(
        var = "Return volatility (%)",
        N = .N,
        mean = mean(mvol_month_hold_tsa),
        sd = sd(mvol_month_hold_tsa),
        p5 = quantile(mvol_month_hold_tsa, 0.05),
        p25 = quantile(mvol_month_hold_tsa, 0.25),
        p50 = median(mvol_month_hold_tsa),
        p75 = quantile(mvol_month_hold_tsa, 0.75),
        p95 = quantile(mvol_month_hold_tsa, 0.95)
    )
}])

print.xtable(xtable(summ[1:2], digits = 0), include.rownames = F)
print.xtable(xtable(summ[3:8], digits = 2), include.rownames = F)
